This project will make a Tencent stock (Tencent Holdings Limited (0700.HK)) prediction using LSTM.
The data was from https://finance.yahoo.com/quote/0700.HK?p=0700.HK&.tsrc=fin-srch, got on 28/Dec/2022.
import pandas as pd
import plotly.graph_objects as go
from plotly import subplots
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, LSTM
df = pd.read_csv('0700.HK.csv')
df = df.set_index('Date')
df = df.dropna()
df
| Open | High | Low | Close | Adj Close | Volume | |
|---|---|---|---|---|---|---|
| Date | ||||||
| 2004-06-16 | 0.875000 | 0.925000 | 0.815000 | 0.830000 | 0.798364 | 2.198875e+09 |
| 2004-06-17 | 0.830000 | 0.875000 | 0.825000 | 0.845000 | 0.812793 | 4.190075e+08 |
| 2004-06-18 | 0.840000 | 0.850000 | 0.790000 | 0.805000 | 0.774317 | 1.829900e+08 |
| 2004-06-21 | 0.820000 | 0.825000 | 0.790000 | 0.800000 | 0.769508 | 1.140850e+08 |
| 2004-06-22 | 0.800000 | 0.800000 | 0.800000 | 0.800000 | 0.769508 | 0.000000e+00 |
| ... | ... | ... | ... | ... | ... | ... |
| 2022-12-20 | 318.799988 | 318.799988 | 306.399994 | 309.399994 | 309.399994 | 2.905642e+07 |
| 2022-12-21 | 311.000000 | 313.200012 | 308.000000 | 311.000000 | 311.000000 | 1.532387e+07 |
| 2022-12-22 | 319.000000 | 325.799988 | 316.399994 | 323.799988 | 323.799988 | 2.425175e+07 |
| 2022-12-23 | 318.000000 | 326.000000 | 317.200012 | 320.200012 | 320.200012 | 1.550270e+07 |
| 2022-12-28 | 325.000000 | 329.600006 | 321.799988 | 326.200012 | 326.200012 | 2.418839e+07 |
4583 rows × 6 columns
line1 = go.Scatter(x = df.index, y = df.Open, mode = 'lines', name = 'Open',visible = "legendonly")
line2 = go.Scatter(x = df.index, y = df.High, mode = 'lines', name = 'High',visible = "legendonly")
line3 = go.Scatter(x = df.index, y = df.Low, mode = 'lines', name = 'Low',visible = "legendonly")
line4 = go.Scatter(x = df.index, y = df.Close, mode = 'lines', name = 'Close')
layout = go.Layout(title = 'Tencent Stock Price (click legend to see graph)',
xaxis = dict(title = 'Date'), yaxis = dict(title = 'Price (HKD)'),
height = 800)
figure = go.Figure([line1,line2,line3,line4], layout)
figure.show()
data = df[['Close']]
scaler = MinMaxScaler(feature_range = (0, 1))
scaled_data = scaler.fit_transform(data)
scaled_data
array([[1.95868481e-04],
[2.15455329e-04],
[1.63223734e-04],
...,
[4.21926808e-01],
[4.17225996e-01],
[4.25060735e-01]])
train = scaled_data[ : int(len(scaled_data)*0.8)]
test = scaled_data[int(len(scaled_data)*0.8) : ]
step = 60
def transform_data(data, stride):
x = []
y = []
for i in range(len(data)-stride):
x.append(data[i : i+stride])
y.append(data[i+stride, 0])
return np.array(x), np.array(y)
train_x, train_y = transform_data(train, step)
train_x, train_y
(array([[[1.95868481e-04],
[2.15455329e-04],
[1.63223734e-04],
...,
[4.57026455e-05],
[6.52894936e-05],
[9.79342404e-05]],
[[2.15455329e-04],
[1.63223734e-04],
[1.56694785e-04],
...,
[6.52894936e-05],
[9.79342404e-05],
[8.48763417e-05]],
[[1.63223734e-04],
[1.56694785e-04],
[1.56694785e-04],
...,
[9.79342404e-05],
[8.48763417e-05],
[6.52894936e-05]],
...,
[[4.26888833e-01],
[4.28716931e-01],
[4.31589684e-01],
...,
[4.92700635e-01],
[4.90089055e-01],
[4.95573388e-01]],
[[4.28716931e-01],
[4.31589684e-01],
[4.19315228e-01],
...,
[4.90089055e-01],
[4.95573388e-01],
[5.00013066e-01]],
[[4.31589684e-01],
[4.19315228e-01],
[4.30022721e-01],
...,
[4.95573388e-01],
[5.00013066e-01],
[5.06803150e-01]]]),
array([8.48763417e-05, 6.52894936e-05, 7.83473923e-05, ...,
5.00013066e-01, 5.06803150e-01, 5.10198211e-01]))
model = Sequential()
model.add(LSTM(units = 100, return_sequences = True, input_shape = (step,1)))
model.add(LSTM(units = 100))
model.add(Dense(25))
model.add(Dense(1))
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm (LSTM) (None, 60, 100) 40800
lstm_1 (LSTM) (None, 100) 80400
dense (Dense) (None, 25) 2525
dense_1 (Dense) (None, 1) 26
=================================================================
Total params: 123,751
Trainable params: 123,751
Non-trainable params: 0
_________________________________________________________________
model.compile(optimizer = 'adam', loss = 'mean_squared_error')
model.fit(train_x, train_y, epochs = 3, batch_size = 1)
Epoch 1/3 3606/3606 [==============================] - 47s 12ms/step - loss: 3.9180e-04 Epoch 2/3 3606/3606 [==============================] - 47s 13ms/step - loss: 1.9317e-04 Epoch 3/3 3606/3606 [==============================] - 47s 13ms/step - loss: 1.2088e-04
<keras.callbacks.History at 0x1920d75b5e0>
test_x = []
for i in range(int(len(scaled_data)*0.8),len(scaled_data)):
test_x.append(scaled_data[i-step : i])
test_x = np.array(test_x)
predict = model.predict(test_x)
predict = scaler.inverse_transform(predict)
29/29 [==============================] - 1s 16ms/step
rmse = np.sqrt(np.mean(predict - test)**2)
rmse
438.88885110212226
predict_df = data[int(len(scaled_data)*0.8):]
predict_df['Predict'] = predict
predict_df
C:\Users\YonkeeZ\AppData\Local\Temp\ipykernel_79796\1773367257.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
| Close | Predict | |
|---|---|---|
| Date | ||
| 2019-04-12 | 393.799988 | 391.042084 |
| 2019-04-15 | 388.200012 | 393.433655 |
| 2019-04-16 | 393.600006 | 390.006592 |
| 2019-04-17 | 395.600006 | 393.010956 |
| 2019-04-18 | 391.600006 | 395.963013 |
| ... | ... | ... |
| 2022-12-20 | 309.399994 | 322.393433 |
| 2022-12-21 | 311.000000 | 316.028320 |
| 2022-12-22 | 323.799988 | 315.401642 |
| 2022-12-23 | 320.200012 | 325.126984 |
| 2022-12-28 | 326.200012 | 325.783051 |
917 rows × 2 columns
line_test = go.Scatter(x = predict_df.index, y = predict_df.Close, mode = 'lines', name = 'Test')
line_pred = go.Scatter(x = predict_df.index, y = predict_df.Predict, mode = 'lines', name = 'Predict')
layout = go.Layout(title = 'Tencent Stock Price Predictions',
xaxis = dict(title = 'Date'), yaxis = dict(title = 'Price (HKD)'),
height = 800)
figure = go.Figure([line4,line_test,line_pred], layout)
figure.show()
last = data[-step:]
last_scaled = scaler.transform(last)
last_x = []
last_x.append(last_scaled)
last_x = np.array(last_x)
pred_next_day = model.predict(last_x)
pred_next_day = scaler.inverse_transform(pred_next_day)
print(pred_next_day)
1/1 [==============================] - 0s 27ms/step [[329.3499]]
df2 = pd.read_csv('0700.HK_check_next_day.csv')
df2 = df2.set_index('Date')
df2[-1:]['Close']
Date 2022-12-29 335.200012 Name: Close, dtype: float64